*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0
 
* This do files uses cluster generated in C++ (lines 2-17 of Algorithm 1)  to 
* complete the deliniation of prime locations (lines 18-34 of Algorithm 1) in Global cities
* This do file is run for various p-values (defined by gloabl $SL) 
 
* Create workspace
	capture mkdir "$temp/125CitiesPLs"
 
* LOAD LIST OF METROS **********************************************************
	qui import delimited "$data_125cities/METRO_LEVEL_COVARIATES/metrolist125.csv", clear
	qui tab metro_id
	local Nmetro = r(N) // Total number of cities in list
	local count = 1
	levelsof  metro_id, local(METROIDS)		
	
* LOOP OVER CITIES *************************************************************
	foreach num of local METROIDS { // loop over metro
	display "...start loop for metro `num'..."
* LOAD GRIDDED OFFICE DATA SET
	qui u "$temp/metro_emp_cluster.dta",clear
	qui keep if metro_id == `num'
	* Gen employment variable for code
		qui gen empl = cell_total_emp
		qui gen devle = developable
		qui format grid_x %9.0f
		qui format grid_y %9.0f
		qui drop if cell_id == ""
		qui sum metro_id 
		global MetroMax =  r(max)
		global MetroMin = r(min)
		
* Compute critical cluster measures
	qui replace clusterID = 0 if clusterID == . 								// Missings from metros that havent been processed
 
 
* Generate mass around clusters ************************************************
	qui gen temp_clusterID = clusterID if clusterID != 0						// temporary cluster ID just to have the levels excluding 0	
	levelsof temp_clusterID if metro_id == `num' , local(levels) 				// get list of PL IDs in a metro
	foreach pl of numlist `levels' {											// loop over PL IDs
		qui CM `num' `pl' 0
	}																			// PL loop closes
	qui drop temp_clusterID
	
* Cluster employment relative to max PL employment in city
	qui gen TrimmedCluster = clusterID > 0										// Trimmed cluster variable
	qui egen Cemp = sum(emp), by(metro_id clusterID)
	qui replace Cemp = Cemp+runiform()*0.1-0.05 								// add microscopic employment to break ties
	qui egen Carea = count(emp), by(metro_id clusterID)
	qui gen Cempdens = 	Cemp  / Carea	
	qui egen CempMax = max(Cemp) if clusterID != 0, by(metro_id)				// Employment of largest cluster in metro
	qui gen CempRel = Cemp/CempMax 												// Cluster employment relative to largest cluster in metro
	qui replace TrimmedCluster = 0 if CempRel < $PLtresh						// Restrict to clusters with minimum relative employment
	qui egen CareaMax = max(Carea) if clusterID > 0, by(metro_id)				// Gen max PL size to ensure we do not drop all PLs in next line
	qui replace TrimmedCluster=0 if Carea==1 & CareaMax > 1	& Cemp != CempMax	// Drop clusters that consist of one cell only
																				// except if cluster is the one with the maximum employment
	
* Ceneterate trimmed cluster variables	
	qui gen TrimmedClusterIDtemp = clusterID*TrimmedCluster if clusterID > 0 	// Trimmed clusters identified
	qui egen TrimmedClusterID = group(TrimmedClusterIDtemp) if TrimmedClusterIDtemp > 0
	qui qui drop TrimmedClusterIDtemp
	qui egen TrimmedClusterX = median(grid_x) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)
	qui egen TrimmedClusterY = median(grid_y) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)		
	
* Run program to aggregate cluster *********************************************
		local O = 999
		local i = 1
		while `O' > 0 {
		* display "...round `i' of city `num'..."
		qui gen TrimmedClusterIDold = TrimmedClusterID // save old ID
		qui AGGCLUSTER2 `num' $DISTtresh
		qui gen test = abs(TrimmedClusterIDold-TrimmedClusterID) // generate objective
		qui sum test
		local O = r(sum)
		qui drop TrimmedClusterIDold test
		local i = `i'+1
		}
		display("...aggregation done...")
 
* Generate PL ID
	qui capture gen PLID = .													// gen PL ID variable
	qui replace PLID = TrimmedClusterID 
 	qui gen N_PLS=.
	qui  distinct(PLID) if metro_id==`num'
	qui  replace N_PLS=r(ndistinct) if metro_id==`num'

* Add minimum mass if necessary
	levelsof PLID if metro_id == `num', local(levels) 							// get list of PL IDs in a metro
	foreach pl of numlist `levels' {											
		qui CH `num' `pl' 2
	}														
														
* Split prime locations if it consists of two remote parts
	qui SPLITTER $SplitDist
	
* DISCONNECT PLs if they are too large 
		levelsof PLID if metro_id == `num', local(levels) 						// get list of PL IDs in a metro
		foreach pl of numlist `levels' {										// loop over PL IDs
		DISCONNECT `pl' $DisconnectDISTthresh	
		}																		// PL loop closes	
	
* Set some missing employment data to zero
	qui replace empl = 0 if empl == .
	
* Generate PL variables	gen PL = PLID>0 & PLID != .
	qui gen PL = PLID>0 & PLID != .
	qui gen PL_x = grid_x if PL == 1
	qui gen PL_y = grid_y if PL == 1
	qui replace PL = 0 if PL == .
	qui egen PL_emp = sum(emp) if PL == 1,by(metro_id PLID)	// PL employment
	qui egen PL_area = sum(Carea) if PL == 1,by (metro_id PLID)	// PL employment
	qui replace PL_area = PL_area / 1000000
	qui gen PL_empdens = PL_emp/PL_area 
	qui qui egen PLX = median(grid_x) if PLID > 0 & PLID != ., by(PLID metro_id) // PL mean x coordinate
	qui qui egen PLY = median(grid_y) if PLID > 0 & PLID != ., by(PLID metro_id) // PL mean y coordinate			
	qui ren PLID PLID_metro
	qui egen PLID = group(PLID_metro metro_id) if PL == 1
	
* Save data set 
	capture mkdir "$temp/125CitiesPLs"
				qui tostring metro_id, gen(metro_id_string)
				qui tostring PLID , gen(PLIDstring)
				qui gen PLID_Global = metro_id_string+"_"+PLIDstring // Generate PLID for smooth merge to shapefiles
				qui drop metro_id_string PLIDstring
	qui save "$temp/125CitiesPLs/grid125_PL_output_`num'.dta", replace		
				qui egen Ranktemp = group(PL_emp)
				qui sum Ranktemp
				qui gen PLrankByCity = r(max)-Ranktemp+1
				qui drop Ranktemp
				qui keep PLID PLID_Global  lon_UL lat_UL lon_LR lat_LR metro_id  PLrankByCity
				qui keep if PLID != . 
	qui export delimited "$temp/125CitiesPLs/grid125_PL_output_`num'.csv", replace
	}
* Metro loop ends ..........................................................
	capture mkdir "$temp/PLtemp"
	preserve
		filelist, dir("$temp/PLtemp")
		levelsof filename, local(files)
		foreach f of local files{
			erase "$temp/PLtemp/`f'"
		}
	restore 	
	
* Append individual grids and finalize PLs
	clear
	foreach num of local METROIDS { // loop over metro
	append using "$temp/125CitiesPLs/grid125_PL_output_`num'.dta"
	}
	save "$temp/125CitiesPLs/grid125_PL_output.dta", replace
	
* Create pseudo PL and save
	capture mkdir "$temp/GIS_using"
	u "$temp/125CitiesPLs/grid125_PL_output.dta", clear	
	preserve 
	keep if PL == 1
	save "$temp/GIS_using/PL125_grids", replace
	restore 	
	
* Script ends
